{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4.2 PyTorch自动求导"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "is_executing": true
    }
   },
   "outputs": [],
   "source": [
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "t_c = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]\n",
    "t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]\n",
    "t_c = torch.tensor(t_c)\n",
    "t_u = torch.tensor(t_u)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def model(t_u, w, b):\n",
    "    return w * t_u + b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def loss_fn(t_p, t_c):\n",
    "    squared_diffs = (t_p - t_c)**2\n",
    "    return squared_diffs.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "params = torch.tensor([1.0, 0.0], requires_grad=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "params.grad is None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([4517.2969,   82.6000])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "loss = loss_fn(model(t_u, *params), t_c)\n",
    "loss.backward()\n",
    "params.grad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "if params.grad is not None:\n",
    "    params.grad.zero_()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def training_loop(n_epochs, learning_rate, params, t_u, t_c):\n",
    "    for epoch in range(1, n_epochs + 1):\n",
    "        if params.grad is not None: \n",
    "            params.grad.zero_() # 这可以在调用backward之前在循环中的任何时候完成\n",
    "        t_p = model(t_u, *params)\n",
    "        loss = loss_fn(t_p, t_c)\n",
    "        loss.backward()\n",
    "        params = (params - learning_rate * params.grad).detach().requires_grad_()\n",
    "        if epoch % 500 == 0:\n",
    "            print('Epoch %d, Loss %f' % (epoch, float(loss)))\n",
    "    return params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 500, Loss 7.860116\n",
      "Epoch 1000, Loss 3.828538\n",
      "Epoch 1500, Loss 3.092191\n",
      "Epoch 2000, Loss 2.957697\n",
      "Epoch 2500, Loss 2.933134\n",
      "Epoch 3000, Loss 2.928648\n",
      "Epoch 3500, Loss 2.927830\n",
      "Epoch 4000, Loss 2.927679\n",
      "Epoch 4500, Loss 2.927652\n",
      "Epoch 5000, Loss 2.927647\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "tensor([  5.3671, -17.3012], requires_grad=True)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t_un = 0.1 * t_u\n",
    "training_loop(\n",
    "    n_epochs = 5000,\n",
    "    learning_rate = 1e-2,\n",
    "    params = torch.tensor([1.0, 0.0], requires_grad=True),\n",
    "    t_u = t_un,\n",
    "    t_c = t_c)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.2.1 优化器"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import torch.optim as optim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['ASGD',\n",
       " 'Adadelta',\n",
       " 'Adagrad',\n",
       " 'Adam',\n",
       " 'AdamW',\n",
       " 'Adamax',\n",
       " 'LBFGS',\n",
       " 'Optimizer',\n",
       " 'RMSprop',\n",
       " 'Rprop',\n",
       " 'SGD',\n",
       " 'SparseAdam',\n",
       " '__builtins__',\n",
       " '__cached__',\n",
       " '__doc__',\n",
       " '__file__',\n",
       " '__loader__',\n",
       " '__name__',\n",
       " '__package__',\n",
       " '__path__',\n",
       " '__spec__',\n",
       " 'lr_scheduler']"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dir(optim)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "params = torch.tensor([1.0, 0.0], requires_grad=True)\n",
    "learning_rate = 1e-5\n",
    "optimizer = optim.SGD([params], lr=learning_rate)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([ 9.5483e-01, -8.2600e-04], requires_grad=True)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t_p = model(t_u, *params)\n",
    "loss = loss_fn(t_p, t_c)\n",
    "loss.backward()\n",
    "optimizer.step()\n",
    "params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([1.7761, 0.1064], requires_grad=True)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "params = torch.tensor([1.0, 0.0], requires_grad=True)\n",
    "learning_rate = 1e-2\n",
    "optimizer = optim.SGD([params], lr=learning_rate)\n",
    "t_p = model(t_un, *params)\n",
    "loss = loss_fn(t_p, t_c)\n",
    "optimizer.zero_grad()\n",
    "loss.backward()\n",
    "optimizer.step()\n",
    "params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def training_loop(n_epochs, optimizer, params, t_u, t_c):\n",
    "    for epoch in range(1, n_epochs + 1):\n",
    "        t_p = model(t_u, *params)\n",
    "        loss = loss_fn(t_p, t_c)\n",
    "        optimizer.zero_grad()\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "        if epoch % 500 == 0:\n",
    "            print('Epoch %d, Loss %f' % (epoch, float(loss)))\n",
    "    return params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "params = torch.tensor([1.0, 0.0], requires_grad=True)\n",
    "learning_rate = 1e-2\n",
    "optimizer = optim.SGD([params], lr=learning_rate)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 500, Loss 7.860116\n",
      "Epoch 1000, Loss 3.828538\n",
      "Epoch 1500, Loss 3.092191\n",
      "Epoch 2000, Loss 2.957697\n",
      "Epoch 2500, Loss 2.933134\n",
      "Epoch 3000, Loss 2.928648\n",
      "Epoch 3500, Loss 2.927830\n",
      "Epoch 4000, Loss 2.927679\n",
      "Epoch 4500, Loss 2.927652\n",
      "Epoch 5000, Loss 2.927647\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "tensor([  5.3671, -17.3012], requires_grad=True)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "training_loop(\n",
    "    n_epochs = 5000,\n",
    "    optimizer = optimizer,\n",
    "    params = params,\n",
    "    t_u = t_un,\n",
    "    t_c = t_c)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "params = torch.tensor([1.0, 0.0], requires_grad=True)\n",
    "learning_rate = 1e-1\n",
    "optimizer = optim.Adam([params], lr=learning_rate) # 新的优化器"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 500, Loss 7.612901\n",
      "Epoch 1000, Loss 3.086700\n",
      "Epoch 1500, Loss 2.928578\n",
      "Epoch 2000, Loss 2.927646\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "tensor([  0.5367, -17.3021], requires_grad=True)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "training_loop(\n",
    "    n_epochs = 2000,\n",
    "    optimizer = optimizer,\n",
    "    params = params,\n",
    "    t_u = t_u, # 使用原始输入而不是t_un\n",
    "    t_c = t_c)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.2.2 训练、验证和过拟合"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(tensor([10,  8,  1,  4,  6,  7,  0,  5,  3]), tensor([2, 9]))"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n_samples = t_u.shape[0]\n",
    "n_val = int(0.2 * n_samples)\n",
    "\n",
    "shuffled_indices = torch.randperm(n_samples)\n",
    "train_indices = shuffled_indices[:-n_val]\n",
    "val_indices = shuffled_indices[-n_val:]\n",
    "\n",
    "train_indices, val_indices # 划分结果是随机的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_t_u = t_u[train_indices]\n",
    "train_t_c = t_c[train_indices]\n",
    "\n",
    "val_t_u = t_u[val_indices]\n",
    "val_t_c = t_c[val_indices]\n",
    "\n",
    "train_t_un = 0.1 * train_t_u\n",
    "val_t_un = 0.1 * val_t_u"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def training_loop(n_epochs, optimizer, params, \n",
    "                  train_t_u, val_t_u, train_t_c, val_t_c):\n",
    "    for epoch in range(1, n_epochs + 1):\n",
    "        train_t_p = model(train_t_u, *params)\n",
    "        train_loss = loss_fn(train_t_p, train_t_c)\n",
    "        \n",
    "        val_t_p = model(val_t_u, *params)\n",
    "        val_loss = loss_fn(val_t_p, val_t_c)\n",
    "        \n",
    "        optimizer.zero_grad()\n",
    "        train_loss.backward() # 注意没有val_loss.backward因为不能在验证集上训练模型\n",
    "        optimizer.step()\n",
    "    \n",
    "        if epoch <= 3 or epoch % 500 == 0:\n",
    "            print('Epoch %d, Training loss %.2f, Validation loss %.2f' % (\n",
    "                    epoch, float(train_loss), float(val_loss)))\n",
    "    return params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1, Training loss 83.48, Validation loss 66.36\n",
      "Epoch 2, Training loss 44.31, Validation loss 14.44\n",
      "Epoch 3, Training loss 37.30, Validation loss 4.51\n",
      "Epoch 500, Training loss 7.16, Validation loss 2.26\n",
      "Epoch 1000, Training loss 3.47, Validation loss 2.71\n",
      "Epoch 1500, Training loss 3.00, Validation loss 2.90\n",
      "Epoch 2000, Training loss 2.94, Validation loss 2.98\n",
      "Epoch 2500, Training loss 2.94, Validation loss 3.00\n",
      "Epoch 3000, Training loss 2.93, Validation loss 3.01\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "tensor([  5.4100, -17.3964], requires_grad=True)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "params = torch.tensor([1.0, 0.0], requires_grad=True)\n",
    "learning_rate = 1e-2\n",
    "optimizer = optim.SGD([params], lr=learning_rate)\n",
    "\n",
    "training_loop(\n",
    "    n_epochs = 3000,\n",
    "    optimizer = optimizer,\n",
    "    params = params,\n",
    "    train_t_u = train_t_un,\n",
    "    val_t_u = val_t_un,\n",
    "    train_t_c = train_t_c,\n",
    "    val_t_c = val_t_c)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.2.3 不需要时关闭`autograd`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def training_loop(n_epochs, optimizer, params, \n",
    "                  train_t_u, val_t_u, train_t_c, val_t_c):\n",
    "    for epoch in range(1, n_epochs + 1):\n",
    "        train_t_p = model(train_t_u, *params)\n",
    "        train_loss = loss_fn(train_t_p, train_t_c)\n",
    "        \n",
    "        with torch.no_grad():\n",
    "            val_t_p = model(val_t_u, *params)\n",
    "            val_loss = loss_fn(val_t_p, val_t_c)\n",
    "            assert val_loss.requires_grad == False\n",
    "        \n",
    "        optimizer.zero_grad()\n",
    "        train_loss.backward()\n",
    "        optimizer.step()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def calc_forward(t_u, t_c, is_train):\n",
    "    with torch.set_grad_enabled(is_train):\n",
    "        t_p = model(t_u, *params)\n",
    "        loss = loss_fn(t_p, t_c)\n",
    "    return loss"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
